%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 3)
plt.rcParams['font.family'] = 'sans-serif'


weather_2012 = pd.read_csv('../data/weather_2012.csv', parse_dates=True, index_col='Date/Time')
weather_2012[:5]


weather_description = weather_2012['Weather']
is_snowing = weather_description.str.contains('Snow')


# Not super useful
is_snowing[:5]

Date/Time
2012-01-01 00:00:00    False
2012-01-01 01:00:00    False
2012-01-01 02:00:00    False
2012-01-01 03:00:00    False
2012-01-01 04:00:00    False
Name: Weather, dtype: bool


# More useful!
is_snowing=is_snowing.astype(float)
is_snowing.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x114ebaa00>


weather_2012['Temp (C)'].resample('M').apply(np.median).plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x11511e580>


is_snowing.astype(float)[:10]

Date/Time
2012-01-01 00:00:00    0.0
2012-01-01 01:00:00    0.0
2012-01-01 02:00:00    0.0
2012-01-01 03:00:00    0.0
2012-01-01 04:00:00    0.0
2012-01-01 05:00:00    0.0
2012-01-01 06:00:00    0.0
2012-01-01 07:00:00    0.0
2012-01-01 08:00:00    0.0
2012-01-01 09:00:00    0.0
Name: Weather, dtype: float64


is_snowing.astype(float).resample('M').apply(np.mean)

Date/Time
2012-01-31    0.240591
2012-02-29    0.162356
2012-03-31    0.087366
2012-04-30    0.015278
2012-05-31    0.000000
2012-06-30    0.000000
2012-07-31    0.000000
2012-08-31    0.000000
2012-09-30    0.000000
2012-10-31    0.000000
2012-11-30    0.038889
2012-12-31    0.251344
Freq: M, Name: Weather, dtype: float64


is_snowing.astype(float).resample('M').apply(np.mean).plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x1151708b0>


temperature = weather_2012['Temp (C)'].resample('M').apply(np.median)
is_snowing = weather_2012['Weather'].str.contains('Snow')
snowiness = is_snowing.astype(float).resample('M').apply(np.mean)

# Name the columns
temperature.name = "Temperature"
snowiness.name = "Snowiness"


stats = pd.concat([temperature, snowiness], axis=1)
stats


stats.plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x1150a57c0>


stats.plot(kind='bar', subplots=True, figsize=(15, 10))

array([<matplotlib.axes._subplots.AxesSubplot object at 0x11529cfa0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1152d0c70>],
      dtype=object)

	Temp (C)	Dew Point Temp (C)	Rel Hum (%)	Wind Spd (km/h)	Visibility (km)	Stn Press (kPa)	Weather
Date/Time
2012-01-01 00:00:00	-1.8	-3.9	86	4	8.0	101.24	Fog
2012-01-01 01:00:00	-1.8	-3.7	87	4	8.0	101.24	Fog
2012-01-01 02:00:00	-1.8	-3.4	89	7	4.0	101.26	Freezing Drizzle,Fog
2012-01-01 03:00:00	-1.5	-3.2	88	6	4.0	101.27	Freezing Drizzle,Fog
2012-01-01 04:00:00	-1.5	-3.3	88	7	4.8	101.23	Fog

6.1 String operations¶

6.2 Use resampling to find the snowiest month¶

6.3 Plotting temperature and snowiness stats together¶

	Temperature	Snowiness
Date/Time
2012-01-31	-7.05	0.240591
2012-02-29	-4.10	0.162356
2012-03-31	2.60	0.087366
2012-04-30	6.30	0.015278
2012-05-31	16.05	0.000000
2012-06-30	19.60	0.000000
2012-07-31	22.90	0.000000
2012-08-31	22.20	0.000000
2012-09-30	16.10	0.000000
2012-10-31	11.30	0.000000
2012-11-30	1.05	0.038889
2012-12-31	-2.85	0.251344